home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
cocktail
/
docme.lha
/
doc.me
/
lalr.me
< prev
next >
Wrap
Text File
|
1992-09-25
|
75KB
|
2,292 lines
.\" use: pic | tbl | eqn | ditroff -me
.\"
.\" "@(#)bibmac.me 2.2 9/9/83";
.de IP
.ip \\$1 \\$2
..
.de LP
.lp
..
.\" @(#)bmac.std 2.2 9/9/83;
.\" standard format troff commands
.\" citation formatting strings
.ds [[ [
.ds ]] ]
.ds ], ,\|
.ds ]- -
.ds [. " \&
.ds .] .
.ds [, " \&
.ds ,] ,
.ds [? " \&
.ds ?] ?
.ds [: " \&
.ds :] :
.ds [; " \&
.ds ;] ;
.ds [! " \&
.ds !] !
.ds [" " \&
.ds "] \&"
.ds [' " \&
.ds '] '
.ds [< " \&
.ds >]
.\" reference formmating strings
.ds a] " \&
.ds b] , \&
.ds c] , \&
.ds n] "\& and \&
.ds m] "\& and \&
.ds p] .
.\" reference formmating macros
.de s[ \" start reference
.nh
.IP [\\*([F] 5m
..
.de e[ \" end reference
.[-
..
.de [] \" start to display collected references
.LP
..
.de ][ \" choose format
.ie !"\\*([J"" \{\
. ie !"\\*([V"" .nr t[ 1 \" journal
. el .nr t[ 5 \" conference paper
.\}
.el .ie !"\\*([B"" .nr t[ 3 \" article in book
.el .ie !"\\*([R"" .nr t[ 4 \" technical report
.el .ie !"\\*([I"" .nr t[ 2 \" book
.el .nr t[ 0 \" other
.\\n(t[[
..
.de 0[ \" other
.s[
.if !"\\*([A"" \\*([A\\c
.if !"\\*([T"" , \\*([T\\c
.if !"\\*([V"" , Vol. \\*([V\\c
.if !"\\*([O"" , \\*([O\\c
.if !"\\*([D"" , \\*([D\\c
\&.
.e[
..
.de 1[ \" journal article
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J \\*([V\\fP\c
.if !"\\*([N"" ,\\*([N
.if !"\\*([D"" (\\*([D)\c
.if !"\\*([P"" , \\*([P\c
.if !"\\*([I"" , \\*([I\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 2[ \" book
.s[
.ie !"\\*([A"" \\*([A,
.el .if !"\\*([E"" \{\
. ie \\n([E-1 \\*([E, eds.,
. el \\*([E, ed.,\}
.if !"\\*([T"" \\fI\\*([T\\fP,
.rm a[
.if !"\\*([I"" .ds a[ \\*([I
.if !"\\*([C"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([C\}
.if !"\\*([D"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([D\}
\\*(a[.
.if !"\\*([G"" Gov. ordering no. \\*([G.
.if !"\\*([O"" \\*([O.
.e[
..
.de 3[ \" article in book
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
in \\fI\\*([B\\fP\c
.if !"\\*([V"" , vol. \\*([V
.if !~\\*([E~~ \{\
. ie , \\n([E-1 \\*([E (editors)\c
. el , \\*([E (editor)\c\}
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 4[ \" report
.s[
.if !"\\*([A"" \\*([A,
.if !~\\*([E~~ \{\
. ie \\n([E-1 \\*([E, editors.
. el \\*([E, editor.\}
\\*([T,
\\*([R\c
.if !"\\*([G"" \& (\\*([G)\c
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 5[ \" conference paper
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J\\fP,
.if !"\\*([C"" \\*([C,
.if !"\\*([D"" \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de [- \" clean up after yourself
.rm [A [B [C [D
.rm [E [F [G
.rm [I [J [K
.rm [N [O [P
.rm [R [T
.rm [V [W
..
.\" @(#)bmac.std 2.2 8/24/83;
.\" standard format troff commands
.\" citation formatting strings
.ds [[ [
.ds ]] ]
.ds ], ,\|
.ds ]- -
.ds [. " \&
.ds .] .
.ds [, " \&
.ds ,] ,
.ds [< " \&
.ds >]
.\" reference formmating strings
.ds c] , \&
.ds n] "" and \&
.ds m] "" and \&
.ds a] " \&
.\" reference formmating macros
.de s[ \" start reference
.nh
.IP [\\*([F] 5m
..
.de e[ \" end reference
.[-
..
.de [] \" start to display collected references
.SH
References
.LP
..
.de ][ \" choose format
.ie !"\\*([J"" \{\
. ie !"\\*([V"" .nr t[ 1 \" journal
. el .nr t[ 5 \" conference paper
.\}
.el .ie !"\\*([B"" .nr t[ 3 \" article in book
.el .ie !"\\*([R"" .nr t[ 4 \" technical report
.el .ie !"\\*([I"" .nr t[ 2 \" book
.el .nr t[ 0 \" other
.\\n(t[[
..
.de 0[ \" other
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
.if !"\\*([O"" \\*([O\c
.if !"\\*([D"" , \\*([D\c
\&.
.e[
..
.de 1[ \" journal article
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J \\*([V\\fP,
.if !"\\*([N"" \\*([N
.if !"\\*([D"" (\\*([D),
.if !"\\*([P"" \\*([P\c
.if !"\\*([I"" , \\*([I\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 2[ \" book
.s[
.ie !"\\*([A"" \\*([A,
.el .if !"\\*([E"" \{\
. ie \\n([E-1 \\*([E, eds.,
. el \\*([E, ed.,\}
.if !"\\*([T"" \\fI\\*([T\\fP,
.rm a[
.if !"\\*([I"" .ds a[ \\*([I
.if !"\\*([C"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([C\}
.if !"\\*([D"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([D\}
\\*(a[.
.if !"\\*([G"" Gov. ordering no. \\*([G.
.if !"\\*([O"" \\*([O.
.e[
..
.de 3[ \" article in book
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
in \\fI\\*([B\\fP,
.if !"\\*([V"" vol. \\*([V,
.if !"\\*([E"" \\*([E (ed.),
.if !"\\*([I"" \\*([I,
.if !"\\*([C"" \\*([C,
.if !"\\*([D"" \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 4[ \" report
.s[
.if !"\\*([A"" \\*([A,
\\*([T,
\\*([R\c
.if !"\\*([G"" \& (\\*([G)\c
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
\\&.
.if !"\\*([O"" , \\*([O.
.e[
..
.de 5[ \" conference paper
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J\\fP,
.if !"\\*([C"" \\*([C\c
.if !"\\*([D"" , \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" , \\*([O.
.e[
..
.de [- \" clean up after yourself
.rm [A [B [C [D
.rm [E [F [G
.rm [I [J [K
.rm [N [O [P
.rm [R [T
.rm [V [W
..
.if t \{ \
.pl 29.7c \" page length
.po 2.5c \" page offset (left margin)
.ll 16.5c \" line length
.lt 16.5c \" title length
.nr LL 16.5c
.nr )l 29.7c
.nr hm 2c
.nr $r 9 \" factor for vertical spacing
.nr $R \n($r
.sz 12 \" font size
.nr pp 12
.nr sp 12
.nr tp 12
.nr fp 10
.hc ~ \" hyphenation character
. \" Umlauts and sharp s
.ds A \(A:
.ds O \(O:
.ds U \(U:
.ds a \(a:
.ds o \(o:
.ds u \(u:
.ds s \(ss
. \" UMLAUT \*:u, etc.
.ds : \v'-0.6m'\h'(1u-(\\n(.fu%2u))*0.13m+0.06m'\z.\h'0.2m'\z.\h'-((1u-(\\n(.fu%2u))*0.13m+0.26m)'\v'0.6m'
.\}
.if n \{ \
.po 0 \" page offset (left margin)
.ll 78 \" line length
.lt 78 \" title length
.nr $r 4 \" factor for vertical spacing
.nr $R \n($r
.hc ~ \" hyphenation character
. \" Umlaute und scharfes s
.ds A Ae
.ds O Oe
.ds U Ue
.ds a ae
.ds o oe
.ds u ue
.ds s sz
.\}
.de _
\&\\$1\l'|0\(ul'\\$2
..
.de FT \" font for programs
.ft C
.sz -2
..
.de FR
.ft R
.sz +2
..
.de [] \" start to display collected references
.uh References
.lp
..
.de $0 \" collect table of contents
.(x
.ta 2c
.ie '\\$2'' \\$1
.el \\$2. \\$1
.)x
..
.de np
.nr $p +1
.ip \\n($p.
..
.de SH
.sp 0.5
.in -3
.r \\$1
.sp 0.5
.in +3
..
.de PP
.sp 0.5
..
.de IP
.ip \\$1 \\$2
..
.de I
.i \\$1
..
.de TH
..
.EQ
delim off
gsize 10
gfont R
.EN
.b " "
.sp 1c
.ta 9c
.ft R
.sz 12
\l'17.1c'
.nf
Lalr - A Generator for
Efficient Parsers
J. Grosch
\l'17.1c'
.sp 12.5c
\l'17.1c'
.ft H
.nf
GESELLSCHAFT F\*UR MATHEMATIK
UND DATENVERARBEITUNG MBH
FORSCHUNGSSTELLE F\*UR
PROGRAMMSTRUKTUREN
AN DER UNIVERSIT\*AT KARLSRUHE
.r
\l'17.1c'
.bp
.oh ''LR-Parser'%'
.eh ''LR-Parser'%'
.ce 99
.sz 20
.b " "
.sp 2
Project
.sp
.b "Compiler Generation"
.sp
.sz 12
\l'15c'
.sp
.sz 16
.b "Lalr - A Generator for Efficient Parsers"
.sp 2
Josef Grosch
.sp 2
.sz 14
Oct. 7, 1988
.sp
.sz 12
\l'15c'
.sp 2
Report No. 10
.sp 2
Copyright \(co 1988 GMD
.sp 2
Gesellschaft f\*ur Mathematik und Datenverarbeitung mbH
Forschungsstelle an der Universit\*at Karlsruhe
Vincenz-Prie\*snitz-Str. 1
D-7500 Karlsruhe
.ce 0
.bp
.ds R \(rh
.\" ds R \(RA
.de FT
.ft C
.sz -2
.vs 10
..
.de $p
.sp 1.5
.ne 2c
.ps 10
.if '\\$3'' \{\
.ce
.tr aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ
\fR\\$1\fP
.tr aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz
.\}
.if '\\$3'1' \{\
.ce
.tr aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ
\fR\\$1\fP
.tr aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz
.\}
.if '\\$3'2' \{\
\fB\\$1\fP
.\}
.ps
.sp 0.1
..
.nr pp 10
.nr sp 10
.nr tp 10
.\" nr $r 5.3 \" factor for vertical spacing
.nr $r 12 \" factor for vertical spacing
.nr $R \n($r
.sz 10
.ce 99
.sz 12
.b "Lalr - A Generator for Efficient Parsers"
.sz 10
.sp
J. Grosch
.i
GMD Forschungsstelle an der Universit\*at Karlsruhe
Vincenz-Prie\*snitz-Str. 1, D-7500 Karlsruhe, Germany
grosch@karlsruhe.gmd.de
.he '''%'
.bp 1
.b
SUMMARY
.ce 0
.lp
.i Lalr
.b
is a parser generator that generates very fast and powerful parsers.
The design goals have been to generate portable, table-driven parsers that
are as efficient as possible and which include all the features needed for
practical applications. Like \fIYacc\fP it accepts LALR(1) grammars,
resolves ambiguities with precedence and associativity of operators,
generates table-driven parsers, and has a mechanism for S-attribution.
Unlike \fIYacc\fP it allows grammars to be written in extended BNF,
includes automatic error reporting, recovery, and repair, and
generates parsers in C or Modula-2.
In case of LR-conflicts, a derivation tree is printed instead of the
involved states and items in order to aid the location of the problem.
The parsers are two to three times faster as those of \fIYacc\fP.
Using a MC 68020 processor, 35,000 tokens per second or 580,000 lines per
minute can be parsed. The sources of Lalr exist in C and in Modula-2.
We describe in detail the development steps of the generated parsers.
We show how software engineering methods like pseudo code and stepwise
refinement can turn a parsing algorithm from a textbook into a complete and
efficient implementation. We present the details of the
generated parsers and show how the performance is achieved with a relatively
simple and short program. We discuss important features of the generator
and finally present a comparison of some parser generators.
.sp
.lp
KEY WORDS syntactic analysis parser generator LALR(1) grammar
.sh 1 Introduction
.pp
The parser generator \fILalr\fP has been developed with the aim of combining
a powerful specification technique for context-free languages with the
generation of highly efficient parsers. As the parser generator processes
the class of LALR(1) grammars, we chose the name \fILalr\fP to express
the power of the specification technique.
.pp
The grammars may be written using extended BNF constructs. Each grammar rule
may be associated with a semantic action consisting of arbitrary statements
written in the target language. Whenever a grammar rule is recognized by the
generated parser, the associated semantic action is executed. A mechanism for
S-attribution (only synthesized attributes) is provided to allow
communication between the semantic actions.
.pp
In case of LR-conflicts a derivation tree is printed to aid the location of
the problem. The conflict can be resolved by specifying precedence and
associativity for terminals and rules. Syntactic errors are handled fully
automatically by the generated parsers including error reporting, recovery,
and repair. The mentioned features are discussed in more detail in one of
the following sections.
.pp
The generated parsers are table-driven. The comb-vector technique\*([<\*([[ASU86\*(]]\*(>]
is used to compress the parse tables. The sources of the generator \fILalr\fP
exist in the languages C and Modula-2. Parsers can be generated in the
languages C and Modula-2, too. The generator uses the algorithm described by
DeRemer and Pennello\*([<\*([[DeP82\*(]]\*(>]
to compute the look-ahead sets.
.\" although the algorithm published by .[ives.] promises to perform better.
Currently \fILalr\fP runs on several workstations under UNIX.
.pp
Parsers generated by \fILalr\fP are two to three times as fast as
\fIYacc\fP\*([<\*([[Joh75\*(]]\*(>] generated
ones. They reach a speed of 35,000 tokens per second or 580,000 lines per
minute on a MC 68020 processor, excluding the time for scanning.
The sizes of the parsers are 25 to 40% larger than those produced by
\fIYacc\fP (resulting in e. g. 37 KB for Ada).
The reason is mainly due to additional code for error recovery, as well as
a small space penalty for the increase of speed.
.\" .pp
.\" Many compiler specialists from industry believe that automatically generated
.\" parsers are not efficient enough for production compilers.
.\" They have been right in the past.
.\" We expect to improve the situation with
.\" .i Lalr,
.\" a parser generator that generates very fast parsers.
.\" Like \fIYacc\fP\*([<\*([[Joh75\*(]]\*(>] it accepts LALR(1) grammars,
.\" resolves ambiguities with precedence and associativity of operators,
.\" generates table-driven parsers, and
.\" has a mechanism for S-attribution.
.\" Unlike \fIYacc\fP it allows grammars to be written in extended BNF,
.\" includes automatic error recovery, error reporting, and error repair, and
.\" generates parsers in C or Modula-2.
.\" The parsers are twice as fast as those of \fIYacc\fP.
.\" Using a MC 68020 processor nearly 580,000 lines per minute can be parsed.
.\" Lalr is implemented in Modula-2.
.\" .pp
.\" Pennello\*([<\*([[Pen86\*(]]\*(>] reports a speed-up of 10 for LR-parsers through a
.\" translation of the parse tables into assembly code.
.\" A disadvantage lies in the increase of the code size.
.\" .pp
.\" For large grammars the advantage of Pennellos implementation is lost as
.\" reported in\*([<\*([[KlM89\*(]]\*(>].
.\" In this case an efficient table-driven parsers is
.\" superior both in time and space. Speed-up numbers have to be treated with
.\" caution, too, because they heavily depend on the object chosen for
.\" comparison. Every number can be reached by an "appropriate" choice.
.pp
Recently some researchers report that very fast LR parsers can be achieved
by generating direct code, in which the parse tables are converted
into executable statements.
Pennello\*([<\*([[Pen86\*(]]\*(>] generates assembly code and reports that the
resulting parsers run six to ten times faster than
table-driven parsers generated by an unspecified generator.
He measured a speed of 500,000 lines per minute on a computer similar to a
VAX 11/780 and 240,000 lines per minute on an Intel 80286.
A disadvantage of this
solution is the increase of the parser size by a factor of two to four,
mainly because a second parser is needed for error recovery.
.pp
Whitney and Horspool\*([<\*([[HoW89\*(],WhH88\*(]]\*(>]
generate C code and report the generation of parsers between five and seven
times faster than those produced by \fIYacc\fP,
resulting in a speed of 95,500 to 142,000 tokens
per second or 700,000 to 2 million lines per minute for parsers for C and
Pascal. The parser size lies in the same range of \fIYacc\fP. Currently,
error recovery, S-attribution, and semantic actions are not provided.
.\" Obviously the speed-up factor and the absolute speed are contradictory to
.\" our results.
In the last section, we discuss the problems with this kind of
measurement and conclude that the results are not comparable.
.pp
Currently, table-driven parsers and direct code schemes are hard to compare.
First, direct code schemes do not as yet implement error recovery,
S-attribution, or semantic actions.
However, for realistic applications these features are necessary and of
course cost some time (see below).
Second, the speed of the direct code
schemes decreases with the grammar size. We also made experiments with
direct code parsers\*([<\*([[KlM89\*(]]\*(>] and found for example in the case
of Ada that an efficient table-driven parser was superior both in speed and
space.
A further problem is that the code directly generated for large grammars may
be too big to be translated under the restrictions of usual compilers.
.pp
According to A\*smann\*([<\*([[Ass88\*(]]\*(>] a typical compiler
spends 25% of the time for lexical analysis, 15% for syntactic analysis, 35%
for symbol table handling and semantic analysis, and 25% for code generation.
In our own experiments, syntactic analysis took 5 to 10% when all
compiler parts were constructed as efficiently as possible.
Therefore, improving the speed of the syntactic analysis phase can reduce
the total compilation time by only a few percent. However, an engineer wants
all the compiler parts to be as good as possible. An engineer also wants a
parser generator to have all the features needed for practical
applications, such as error recovery, S-attribution, and semantic actions.
.pp
From this engineering point of view we decided to follow the table-driven
approach. This avoids the disadvantages of the direct code parsers like
assembly code, large parsers, and trouble with compiler restrictions for big
programs. On the other hand, fast table-driven parsers can be generated, and
can include error recovery, S-attribution, and semantic actions
with as few space and time expenses as possible.
.pp
This paper shows that a speed-up between two and three times faster than
\fIYacc\fP is possible using a table-driven implementation programmed in C.
With this approach, the code size increases only by 25 to 40%, mainly because
of added features like error recovery.
The improvement is possible by carefully designing the encoding of the
parser actions, the details of the parsing algorithm, and the structure of
the parse tables.
In the following we will discuss the implementation of the generated parsers
as well as some important features of the generator \fILalr\fP and
present a comparison to other parser generators.
.sh 1 "The Generated Parser"
.pp
This section describes the parsers generated by \fILalr\fP.
We develop the parsing
algorithm step by step as given below. We will use a pseudo code notation
except in the last step where we introduce C code.
.(b
.vs 12
- basic LR-parsing algorithm
- LR(0) reductions
- encoding of the table entries
- semantic actions and S-attribution
- table representation and access
- error recovery
.\" - implementation issues
- mapping pseudo code to C
.)b
.pp
To be able to formally handle scanners, stacks, and grammars, we assume
three modules. We only give the headings of the exported procedures
consisting out of the procedure name and the types of the parameters and
results.
.(b
.vs 12
.FT
MODULE scanner
PROCEDURE GetToken (): Vocabulary
PROCEDURE GetAttribute (): <any type>
.)b
.lp
Each call of the function GetToken yields the next token of the input. If
the input is read completely, GetToken yields the special value EndOfInput.
A call of the function GetAttribute returns the attributes of the current
token, such as symbol tables indices for identifiers or values of numbers.
.(b
.vs 12
.FT
MODULE stack
PROCEDURE Push (Stack, Element)
PROCEDURE Pop (Stack): Element
PROCEDURE Top (Stack): Element
.)b
.lp
A stack is defined as usual.
.(b
.vs 12
.FT
MODULE grammar
PROCEDURE Length (Productions): Integer
PROCEDURE LeftHandSide (Productions): Nonterminals
PROCEDURE Semantics (Productions): <action statements>
.)b
.lp
The function Length returns the length of the right-hand side of a
production. The function LeftHandSide returns the nonterminal on the
left-hand side of a production. The function Semantics maps every production
to some action statements. These statements should be executed whenever the
associated production is recognized or reduced.
.sh 2 "Basic LR-Parsing Algorithm"
.pp
We start by looking in a textbook on compiler construction\*([<\*([[ASU86\*(],WaG84\*(]]\*(>].
(Following\*([<\*([[DeP82\*(]]\*(>] we use the notion
\fIread\fP action for what is usually called \fIshift\fP action.)
An LR-Parser is controlled by a parse
table implementing the following function
.(b
Table : States \(mu Vocabulary \(-> Actions
.)b
where
.(b
Actions = ( read \(mu States ) \(cu ( reduce \(mu Productions ) \(cu { halt, error }
.)b
.(z L
.vs 12
\fBAlgorithm 1:\fP Basic LR parser
.FT
BEGIN
Push (StateStack, StartState)
Terminal := GetToken ()
.sp 0.5
LOOP
CASE Table (Top (StateStack), Terminal) OF
.sp 0.5
read t: Push (StateStack, t)
Terminal := GetToken ()
.sp 0.5
reduce p: FOR i := 1 TO Length (p) DO
State := Pop (StateStack)
END
Nonterminal := LeftHandSide (p)
.sp 0.5
CASE Table (Top (StateStack), Nonterminal) OF
read t: Push (StateStack, t)
END
.sp 0.5
error: ErrorRecovery ()
.sp 0.5
halt: EXIT
.sp 0.5
END
END
END
.)z
.pp
The table controls a general LR parsing algorithm (Algorithm 1).
This is a pushdown
automaton which remembers the parsing of the left context in a stack of
states. Depending on the state on top of the stack and on the actual
look-ahead symbol, it accesses the parse table and executes the obtained action.
.pp
There are two places where the table is accessed. Depending on the second
argument of the function Table, we will call these places
.i terminal
and
.i nonterminal
accesses. At a terminal access, all four actions are possible.
Nonterminals appear after
reductions, only a read action is possible at a nonterminal access: no
error action can occur. Nevertheless we use a CASE statement at a
nonterminal access to decode the action, because there will be more cases
in the next step.
.sh 2 "LR(0) Reductions"
.pp
The textbooks also tell us about LR(0) reductions or read-reduce actions
\*([[WaG84\*(]].
For most languages 50% of the states are LR(0) reduce states, in which a
reduce action is determined without examining the look-ahead token.
The introduction of a read-reduce action
is probably one of the best available optimizations.
This saves many table accesses and considerable table space.
.(b
Actions = ( read \(mu States ) \(cu ( reduce \(mu Productions ) \(cu ( read-reduce \(mu Productions ) \(cu { halt, error }
.)b
.(z L
.vs 12
\fBAlgorithm 2:\fP LR parser with LR(0) reductions
.FT
BEGIN
Push (StateStack, StartState)
Terminal := GetToken ()
.sp 0.5
LOOP
CASE Table (Top (StateStack), Terminal) OF
.sp 0.5
read t: Push (StateStack, t)
Terminal := GetToken ()
.sp 0.5
read-reduce p: Push (StateStack, _)
Terminal := GetToken ()
GOTO L
.sp 0.5
reduce p: L: LOOP
FOR i := 1 TO Length (p) DO
State := Pop (StateStack)
END
Nonterminal := LeftHandSide (p)
.sp 0.5
CASE Table (Top (StateStack), Nonterminal) OF
.sp 0.5
read t: Push (StateStack, t)
EXIT
.sp 0.5
read-reduce p: Push (StateStack, _)
.sp 0.5
END
END
.sp 0.5
error: ErrorRecovery ()
.sp 0.5
halt: EXIT
.sp 0.5
END
END
END
.)z
.pp
As we did not find an LR parsing algorithm that uses read-reduce actions in
the literature we present our version in Algorithm 2.
The character '_' stands for
a value that doesn't matter. A read-reduce action can occur at both places
of table access. In the terminal case, we combine a read and a reduce
action. In the nonterminal case we have to "virtually" read the nonterminal
and then to execute a reduction. This is accomplished by the inner LOOP
statement. A
reduce action can be followed by a series of read-reduce actions. The inner
LOOP statement is terminated on reaching a read action.
.pp
This solution with an inner LOOP statement has two advantages: First, as there are
only two cases within the second CASE statement, it can be turned into an
IF statement. Second, there is no need to differentiate between read and
read-reduce with respect to terminal or nonterminal table access, as these
different kinds of access occur at two different places.
.sh 2 "Encoding of the Table Entries"
.pp
The next problem is how to efficiently represent the table entries.
Conceptually, these entries are pairs consisting of an action indicator and a
number denoting a state or a production. A straightforward representation
using a record wastes too much space and is too hard to decode for
interpretation. It is advantageous to represent the table entries by simple
integers in the following way:
.(b
Table : States \(mu Vocabulary \(-> INTEGER
.)b
where
.(b
.vs 12
.TS
;
l l l.
action integer value constant name
_
error 0 NoState
read 1 1
\&...
read n n
read-reduce 1 n + 1 FirstReadReduce
\&...
read-reduce m n + m
reduce 1 n + m + 1 FirstReduce
\&...
reduce m n + m + m
halt n + m + m + 1 Stop
.TE
.)b
.pp
The constant n stands for the number of states and the constant m
for the number of productions. As it is not possible to "read-reduce" every
production, not all numbers between n+1 and n+m are used.
The advantage of this solution is that the table entries do not take much
space, and that to decode them the CASE statements can be turned into three
simple comparisons as shown in Algorithm 3.
We neglect the actions error and halt for the moment, and reintroduce them in
later sections.
.(z L
.vs 12
\fBAlgorithm 3:\fP LR parser with actions encoded by integers
.FT
BEGIN
Push (StateStack, StartState)
Terminal := GetToken ()
.sp 0.5
LOOP
State := Table (Top (StateStack), Terminal)
.sp 0.5
IF State >= FirstReadReduce THEN /* reduce or read-reduce? */
.sp 0.5
IF State < FirstReduce THEN /* read-reduce */
Push (StateStack, _)
Terminal := GetToken ()
Production := State - FirstReadReduce + 1
ELSE /* reduce */
Production := State - FirstReduce + 1
END
.sp 0.5
LOOP /* reduce */
FOR i := 1 TO Length (Production) DO
State := Pop (StateStack)
END
Nonterminal := LeftHandSide (Production)
.sp 0.5
State := Table (Top (StateStack), Nonterminal)
.sp 0.5
IF State < FirstReadReduce THEN /* read */
Push (StateStack, State)
EXIT
ELSE /* read-reduce */
Push (StateStack, _)
END
END
.sp 0.5
ELSE /* read */
Push (StateStack, State)
Terminal := GetToken ()
END
END
END
.)z
.\" .pp
.\" In practice read-reduce actions will not occur for all productions. In this
.\" sense we are wasting integer codes. But as we chose to differentiate between
.\" reduce and read-reduce by a test whether odd or even we have no other way.
.sh 2 "Semantic Actions and S-Attribution"
.pp
For the implementation of an S-attribution which is evaluated during LR parsing,
the parser has to maintain a second stack for the attribute values.
This stack grows and shrinks in parallel with the existing stack for the states.
Algorithm 4 shows how these features have to be added.
.(z L
.vs 12
\fBAlgorithm 4:\fP LR parser with action statements and S-attribution
.FT
BEGIN
Push (AttributeStack, _)
Push (StateStack, StartState)
Terminal := GetToken ()
.sp 0.5
LOOP
State := Table (Top (StateStack), Terminal)
.sp 0.5
IF State >= FirstReadReduce THEN /* reduce or read-reduce? */
.sp 0.5
IF State < FirstReduce THEN /* read-reduce */
Push (AttributeStack, GetAttribute ())
Push (StateStack, _)
Terminal := GetToken ()
Production := State - FirstReadReduce + 1
ELSE /* reduce */
Production := State - FirstReduce + 1
END
.sp 0.5
LOOP /* reduce */
FOR i := 1 TO Length (Production) DO
Dummy := Pop (AttributeStack)
State := Pop (StateStack)
END
Nonterminal := LeftHandSide (Production)
.sp 0.5
Semantics (Production) ()
.sp 0.5
State := Table (Top (StateStack), Nonterminal)
.sp 0.5
IF State < FirstReadReduce THEN /* read */
Push (AttributeStack, SynAttribute)
Push (StateStack, State)
EXIT
ELSE /* read-reduce */
Push (AttributeStack, SynAttribute)
Push (StateStack, _)
END
END
.sp 0.5
ELSE /* read */
Push (AttributeStack, GetAttribute ())
Push (StateStack, State)
Terminal := GetToken ()
END
END
END
.)z
.pp
In order to be able to access the attributes of all right-hand side symbols, we
need a stack with direct access, because the attribute for the i-th symbol
(counting from 1) has to be accessed by
.(b
.FT
AttributeStack [StackPointer + i]
.)b
The action statement can compute an attribute value for the left-hand side
of the production which has to be assigned to the variable SynAttribute (for
a synthesized attribute). After executing the action statements, this value
is pushed onto the attribute stack by the parser to reestablish the invariant
of the algorithm. The attribute values for terminals have to be provided by
the scanner.
.pp
As many of the terminals don't bear any attribute, most of the associated
Push operations could be replaced by pushing a dummy value: that is, an
increment of the stack pointer would be enough. To be able to distinguish
between two kinds of Push operations, two kinds of read actions would be
necessary. To make the decision would cost an extra check for every token.
We did not use this optimization because we believe that the
extra checks cost as much as the saved assignments.
.\" .sh 2 "Partial Evaluation of the Reduce Actions"
.pp
How do we implement the mapping of a production to the associated
action statements? Of course, the natural
.\" and probably only
solution is a CASE
statement. The access to the Length and the LeftHandSide also depends on
the production. One choice would be to access two arrays. As array access is
relatively expensive, we can move these computations into the CASE statement
which we already need for the action statements. In each case, these
computations are turned into constants. The FOR loop disappears anyway,
because it suffices to decrement the stack pointers. The code common to all
reductions is not included in the CASE statement but follows afterwards.
We also factor out the code common to all parts of the IF statement at the
end of each reduction (Algorithm 5).
.(z L
.vs 12
\fBAlgorithm 5:\fP LR parser with CASE statement
.FT
BEGIN
Push (AttributeStack, _)
Push (StateStack, StartState)
Terminal := GetToken ()
.sp 0.5
LOOP
State := Table (Top (StateStack), Terminal)
.sp 0.5
IF State >= FirstReadReduce THEN /* reduce or read-reduce? */
.sp 0.5
IF State < FirstReduce THEN /* read-reduce */
Push (AttributeStack, GetAttribute ())
Push (StateStack, _)
Terminal := GetToken ()
END
.sp 0.5
LOOP /* reduce */
CASE State OF
.sp 0.5
Stop: HALT
.sp 0.5
...
.sp 0.5
p+FirstReadReduce-1, p+FirstReduce-1:
.sp 0.5
FOR i := 1 TO Length (p) DO
Dummy := Pop (AttributeStack)
State := Pop (StateStack)
END
Nonterminal := LeftHandSide (p)
Semantics (p) ()
.sp 0.5
q+FirstReadReduce-1, q+FirstReduce-1:
.sp 0.5
AttributeStackPointer -:= m
StateStackPointer -:= m
Nonterminal := n
.sp 0.5
<action statements for q> /* Semantics (q) */
.sp 0.5
...
.sp 0.5
END
.sp 0.5
State := Table (Top (StateStack), Nonterminal)
Push (AttributeStack, SynAttribute)
Push (StateStack, State)
.sp 0.5
IF State < FirstReadReduce THEN EXIT END /* read */
END
.sp 0.5
ELSE /* read */
Push (AttributeStack, GetAttribute ())
Push (StateStack, State)
Terminal := GetToken ()
END
END
END
.)z
.pp
Parts of the code in the case alternatives can be evaluated during generation time.
In Algorithm 5, p and q stand for arbitrary productions. Whereas in the case
of p the code has just been carried over from Algorithm 4, the case of q
contains the code after applying constant folding: the FOR loop
reduces to a decrement of the stack pointers and the precomputation of the
left-hand side of q yields a constant:
.(b
m = Length (q)
n = LeftHandSide (q)
.)b
.pp
The two stacks could use one common stack pointer if this were an array
index. As we want to arrive at real pointers in a C implementation, we have
to distinguish between the two stack pointers.
.pp
The halt action (Stop) can be treated as a special case of a reduction. It
occurs when the production S' \(-> S # is recognized. We have augmented the
given grammar by this production where
.(b
.ta 1c
S is the original start symbol
S' is the new start symbol
# is the end of input token
.)b
By this we assure that the complete input is parsed and checked for
syntactical correctness.
.sh 2 "Table Representation and Access"
.pp
After developing the principle algorithm for LR-parsing, the question of
how to implement the function Table has to be discussed before we turn to
the details of the implementation. The most natural solution might be to
use a two-dimensional array. For large languages like Ada this array would
become quite big:
.(b
( 95 terminals + 252 nonterminals ) * 540 states * 2 bytes = 374,760 bytes
.)b
This amount may be bearable with todays main memory capacities. However, we
have chosen the classical solution of compressing the sparse matrix.
Using this compression the storage required for the Ada parser is reduced to
22,584 bytes, including additional information for error recovery. The
decision of how to compress the table has to take into account the desired
storage reduction and the cost of accessing the compressed data structure.
.pp
An advantageous compression method is the comb-vector
technique described in\*([<\*([[ASU86\*(]]\*(>] which is used for example in
\fIYacc\fP\*([<\*([[Joh75\*(]]\*(>] and in the latest version of PGS\*([<\*([[KlM89\*(]]\*(>].
This technique combines very good compression quality with fast access.
The rows (or columns) of a sparse matrix are merged into a single vector
called Next. Two additional vectors called Base and Check are necessary to
accomplish the access to the original data. Base contains for every row
(column) the offset where it starts in Next. Check contains for every entry
in Next the original row (column) index. The resulting data structure
resembles the merging of several combs into one. The optional combination
with a fourth array called Default allows further compression of the array,
as parts common to more than one row (column) can be factored out.
Algorithm 6 shows how to access the compressed data structure if rows are
merged. For more details see\*([<\*([[ASU86\*(]]\*(>].
.(b L
.vs 12
.sp 0.5
\fBAlgorithm 6:\fP access to the compressed table (comb-vector)
.FT
PROCEDURE Table (State, Symbol)
LOOP
IF Check [Base [State] + Symbol] = State THEN
RETURN Next [Base [State] + Symbol]
ELSE
State := Default [State]
IF State = NoState THEN RETURN error END
END
END
END
.)b
.pp
With this kind of compression it is possible to reduce the table size
to less than 10%. The space and time behaviour can be improved even more,
yielding in the case of Ada a size reduction of 6%.
Algorithm 6 may return the value "error". However, if
Symbol is a nonterminal, no errors can occur, as nonterminals are computed
during reductions and are always correct. Therefore, in the case of
nonterminal access, if Default is omitted the vector Check can be omitted,
too. In order to implement this it is necessary to split the function
Table in two parts, one for terminal and one for nonterminal access:
.(b
.TS
;
l l l.
TTable : States \(mu Terminals \(-> INTEGER
NTTable : States \(mu Nonterminals \(-> INTEGER
.TE
.)b
.pp
The terminal Table TTable is accessed as before using Algorithm 6.
The access to the nonterminal Table NTTable can be simplified as shown in
Algorithm 7. Only the vectors NTNext and NTBase are necessary in this case.
.(b L
.vs 12
.sp 0.5
\fBAlgorithm 7:\fP access to the nonterminal table
.FT
PROCEDURE NTTable (State, Symbol)
RETURN NTNext [NTBase [State] + Symbol]
END
.)b
.pp
Splitting the table into two parts has several advantages: the storage
reduction is improved because the two parts pack better if handled
independently. The storage reduction by omitting Default and Check is
greater than using these two vectors. The table access time in the
nonterminal case is improved significantly.
.pp
A further possibility to reduce space is to make the arrays Next and Check
only as large as the significant entries require. Then a check has to be
inserted to see if the expression 'Base [State] + Symbol' is within the array
bounds. We decided to save this check by increasing the arrays to a size
where no bounds violations can occur.
.sh 2 "Error Recovery"
.pp
The error recovery of \fILalr\fP
.\" and its connection to the parsing algorithm
.\" is only described in principle due to the limited space.
.\" We follow
follows the complete backtrack-free method described by
\*([[R\*oh76\*(],R\*oh80\*(],R\*oh82\*(]].
The error recovery used by \fILalr\fP is completely automatic and includes
error reporting, recovery, and repair.
From the user's point of view it works as described in a later section.
.pp
There is only one place where an error can occur: in an access to the
terminal table where
the lookahead token is not a legal continuation of the program
recognized so far. The algorithm for error recovery replaces in the
access of the terminal table (Algorithm 6) the return of the value "error".
The parsing algorithm has two modes: the \fIregular mode\fP and the
\fIrepair mode\fP used during error repair. A problem is
that during repair mode reductions and the associated semantic actions have
to be executed. To avoid duplication of this code we use the same code
during both modes. In order to avoid the immediate generation of new errors
in repair mode error messages and skipping of tokens
are disabled in this mode. Repair mode is exited when we can
accept a token at one of the two places where tokens are read.
We add an instruction at these places to leave repair mode.
.pp
It might be argued that this additional instruction to leave repair mode
could be avoided. This is true, if either the code for reductions and
semantic actions were duplicated or turned into a procedure which could be
called twice. The first solution would increase the code size significantly.
This is avoided in the second solution, but we have to pay for a
procedure call at every reduction. This is more expensive than a simple
assignment to change the mode for every input token, because the number of
input tokens is usually about the same as the number of reductions.
.if 0 \{\
.sh 2 "Implementation Issues"
.pp
After developing the principle algorithms for LR-parsing, table access, and
error recovery this chapter summarizes the implementation issues discussed
so far:
.ip -
Allowing ambiguous grammars and using precedence and associativity to solve
LR-conflicts, makes the majority of chain productions unnecessary and saves
the according
reduce actions. This kind of elimination of chain productions improves
the run time behaviour without the disadvantage of increasing the table size.
.ip -
The introduction of read-reduce actions saves for up to 50% of the
reduce actions one table access and the decoding of the table entry.
.ip -
The table entries are simple integers which consume little space and are
easy to decode and interpret.
In comparison to records with two fields as entries only one "field" has to
be accessed. In comparison to "bit stuffing" schemes using logical
instructions or
multiplication to code two numbers into one there is no need to decode by
masking or division.
.ip -
The separation of the terminal and the nonterminal table access causes 3
kinds of table entries to be enough. This allows simple decoding.
.ip -
To decode the table entries 3 simple comparisons with integer constants
suffice, there is no need for expensive CASE statements.
.ip -
No procedure call is necessary to execute semantic actions, as these are
inserted in-line.
.ip -
Using partial evaluation to access the length and the left-hand side of
productions two accesses to arrays can be turned into two constants.
.ip -
Implementing the tables with the comb-vector technique combines good storage
compression with fast access. Splitting the table into a terminal and a
nonterminal part allows further improvements.
.ip -
Treating the halt action as a special case of a reduction results in no
costs to detect this action.
.ip -
The detection of the error action is for free, as this check is already
necessary to access the terminal table.
.ip -
Using the regular parsing loop during error recovery seems to be a good
compromise. A simple assignment statement executed for
each input token avoids duplication of the semantic actions or the cost
of a procedure call.
.ip -
Of course, the procedures of algorithms 6 and 7 are inserted in-line.
.ip -
Most compilers translate IF statements so that the ELSE part is more
efficient to execute than the THEN part. That is why we have placed the read
action in an ELSE part as it seems to be a little bit more probable than a
reduce action.
.lp
.\}
.sh 2 "Mapping Pseudo Code to C"
.pp
The remaining implementation decisions are how to map the pseudo code
instructions into real programming language statements. This concerns
primarily the operations Push and Top and the table access.
The following arguments
hold only for the language C, as things are different in Modula-2.
.pp
The communication of the attribute value of a token from the scanner is not
done by the procedure GetAttribute but by the global variable Attribute.
.pp
Stacks are implemented as arrays which are administered by a stack pointer.
If the stack pointer is a register variable a Push operation could be
accomplished by one machine instruction on an appropriate machine if auto
increment is used. We preferred post increment, because we use a MC 68020
processor.
.(b
.FT
Push (AttributeStack, Attribute) \*R * yyAttrStackPtr ++ = Attribute;
.)b
.pp
The operation Top turns into dereferencing a pointer.
.(b
.FT
Top (StateStack) \*R * yyStateStackPtr
.)b
.pp
A dummy value is easily pushed by an increment instruction.
.(b
.FT
Push (StateStack, _) \*R yyStateStackPtr ++;
.)b
.pp
The vector NTBase does not contain integer values but direct pointers into
the vector NTNext to indicate where the rows start in NTNext.
These pointers are initialized after loading of the program. This saves the
addition of the start address of NTNext during the outer array access.
The start address is already preadded to the elements of the vector NTBase.
This kind of access is probably cheaper than the access to an uncompressed
two-dimensional array which usually involves multiplication.
The same technique is applied to the vector Base of the terminal table.
.(b
.FT
State := NTNext [NTBase [Top (StateStack ())] + Symbol] \*R
yyState = * (yyNTBasePtr [* yyStateStackPtr] + yyNonterminal);
.)b
.pp
The terminal table access is handled as follows.
Instead of implementing the vectors Next and Check as separate arrays,
.\" which would be equivalent to a record of arrays
we used one array of records. The array is called Comb and the records have
two fields called Check and Next. This transformation saves one array access,
because whenever we have computed the address of the Check field we find
the Next field besides it.
.(b
.vs 12
.FT
LOOP
IF Check [Base [State] + Symbol] = State THEN
RETURN Next [Base [State] + Symbol]
ELSE
State := Default [State]
IF State = NoState THEN <error recovery> END
END
END
.sp 0.5
\*R
.)b
.(b
.vs 12
.FT
for (;;) {
typedef struct { int Check, Next; } yyTCombType;
register yyTCombType * TCombPtr;
.sp 0.5
TCombPtr = yyTBasePtr [yyState] + yyTerminal;
if (TCombPtr->Check == yyState) {
yyState = TCombPtr->Next;
break;
};
if ((yyState = yyDefault [yyState]) != yyNoState) continue;
.sp 0.5
< code for error recovery >
}
.)b
.pp
To check for stack overflow we have to add the code below at every read
(terminal) action. Read-reduce and reduce actions can only cause the stacks
to grow by at most one element and therefore don't need an overflow check.
We have implemented the stacks as flexible arrays. In case of stack overflow
the sizes of the arrays are increased automatically. Therefore from the
users point of view stack overflow never occurs.
.(b
.FT
if (yyStateStackPtr >= yyMaxPtr) < allocate larger arrays and copy the contents >
.)b
.pp
Of course we have used the register attribute for the most used variables:
.(b
.FT
yyState, yyTerminal, yyNonterminal, yyStateStackPtr, yyAttrStackPtr, TCombPtr
.)b
.pp
The variables yyTerminal and yyNonterminal have the type \fClong\fP
because they are involved in address computations. These have to be
performed with long arithmetic in C. The \fPlong\fP declaration saves
explicit machine instructions for length conversions (at least on MC 68020
processors). The variable yyState has the type \fPshort\fP in order to be
compatible with the type of the table elements. These have the type
\fPshort\fP to keep the table size reasonable small.
.pp
Continue statements have been changed to explicit gotos, because some
compilers don't generate optimal jump instructions.
.pp
The appendix shows an example of a parser generated by \fILalr\fP. The
error recovery, which constitutes most of the code, and some other parts
have been omitted.
Some details may deviate from the above description which concentrated
primarily on the principles. For example all identifiers carry the prefix
\&'yy' to avoid conflicts with identifiers introduced by the user into the
semantic actions.
Also, the sizes of the arrays are greater than really necessary as we used a
non-dense coding for the terminal symbols.
.sh 1 "The Parser Generator"
.pp
This chapter will discuss important features of the generator
\fILalr\fP from the user's point of view.
.sh 2 "Structure of Specification"
.pp
The structure of a parser specification is shown in Figure 1.
There may be five sections to include arbitrary target code,
which may contain declarations to be used in the semantic
actions or statements for initialization and finalization of data structures.
The TOKEN section defines the terminals of the grammar
and their encoding. In the OPER (for operator) section, precedence and
associativity for terminals can be specified to resolve LR-conflicts. The
RULE section contains the grammar rules and semantic actions.
A complete definition of the specification language can be found in the user
manual\*([<\*([[GrV88\*(]]\*(>].
.(b I
.vs 12
.sp 0.5
.FT
EXPORT { external declarations }
GLOBAL { global declarations }
LOCAL { local declarations }
BEGIN { initialization code }
CLOSE { finalization code }
TOKEN coding of terminals
OPER precedence of operators
RULE grammar rules and semantic actions
.ft R
.sz 10
Fig. 1: Structure of Specification
.)b
.sh 2 "Semantic Actions and S-Attribution"
.pp
A parser for practical applications has more to do than just syntax checking:
it must allow some kind of translation of the recognized input. In the
case of LR parsing, action statements can be associated with the productions.
These statements are executed whenever the production is reduced.
.pp
Additionally, during LR parsing it is possible to evaluate an S-attribution
(only synthesized attributes). This mechanism works as follows:
every terminal or nonterminal of the grammar can be associated with an
attribute. After a reduction (that is during the execution of the action
statements) the attributes of the symbols on the right-hand side of the
reduced production can be accessed from an attribute stack using the notation $i.
The action statement can compute an attribute value for the left-hand side
of the production which has to be assigned to the special variable $$.
After executing the action statements, this value
is pushed onto the attribute stack by the parser to reestablish the invariant
of the algorithm. The attribute values for terminals have to be provided by the scanner.
Figure 2 shows an example for the syntax of grammar rules, semantic actions, and
S-attribution.
.(b I
.vs 12
.sp 0.5
.FT
expr : expr '+' expr { $$.value := $1.value + $3.value; } .
expr : expr '*' expr { $$.value := $1.value * $3.value; } .
expr : '(' expr ')' { $$.value := $2.value; } .
expr : number { $$.value := $1.value; } .
.ft R
.sz 10
Fig. 2: Grammar Rules with Semantic Actions and S-Attribution
.)b
.sh 2 "Ambiguous Grammars"
.pp
The grammar of Figure 2 as well as the example in Figure 3 are typical
examples of ambiguous grammars. Like \fIYacc\fP we allow resulting
LR-conflicts to be resolved
by specifying precedence and associativity for terminals in the OPER
section. Figure 4 gives an example. The lines represent increasing levels of
precedence. LEFT, RIGHT, and NONE denote left-associativity,
right-associativity, and no associativity. Rules can inherit the properties
of a terminal with the PREC suffix.
.(b I
.vs 12
.sp 0.5
.FT
stmt : IF expr THEN stmt PREC LOW
| IF expr THEN stmt ELSE stmt PREC HIGH .
.ft R
.sz 10
Fig. 3: Ambiguous Grammar (Dangling Else)
.)b
.(b I
.vs 12
.FT
OPER LEFT '+'
LEFT '*'
NONE LOW
NONE HIGH
.ft R
.sz 10
Fig. 4: Resolution of LR-Conflicts Using Precedence and Associativity
.)b
.sh 2 "LR-Conflict Message"
.(z I
.vs 12
.FT
State 266
.sp 0.5
read reduce conflict
.sp 0.5
\&program End-of-Tokens
\&PROGRAM identifier params ';' block '.'
\&..............................:
\&:
\&labels consts types vars procs BEGIN stmts END
\&.....................................:
\&:
\&stmt
\&IF expr THEN stmt ELSE stmt
:
IF expr THEN stmt
:
reduce stmt -> IF expr THEN stmt. {ELSE} ?
read stmt -> IF expr THEN stmt.ELSE stmt ?
.ft R
.sz 10
Fig. 5: Derivation Tree for an LR-Conflict (Dangling Else)
.)z
.pp
To help a user locate the reason for LR-conflicts, we adopted the method
proposed by\*([<\*([[DeP82\*(]]\*(>]. Besides reporting the type of
the conflict and the involved items (whatever that is for the user) like most
LR parser generators do, the system also prints a derivation tree.
Figure 5 shows an
example. It shows how the items and the look-ahead tokens get into the
conflict situation. In general there can be two trees if the derivations for
the conflicting items are different. Each tree consists of three parts. An
initial part begins at the start symbol of the grammar. At a certain node
(rule) two subtrees explain the emergence of the item and the look-ahead.
.pp
Every line contains a right-hand side of a grammar rule. Usually the
right-hand side is indented to start below the nonterminal of the left-hand
side. To avoid line overflow, dotted edges also refer to the left-hand side
nonterminal and allow a shift back to the left margin. In Figure 5 the
initial tree part consists of 5 lines (not counting the dotted lines). The
symbols \fIstmt\fP and ELSE are the roots of the other two tree parts. This
location is indicated by the "unnecessary" colon in the following line.
After one intermediate line the left subtree derives the conflicting items.
The right subtree consists in this case only of the root node (the terminal
\&ELSE) indicating the look-ahead. In general this can be a tree of
arbitrary size. The LR-conflict can easily be seen from this tree fragment.
If conditional statements are nested as shown, then there is a read reduce
conflict (also called shift reduce conflict).
.sh 2 "Error Recovery"
.(z I
.vs 12
Source Program:
.sp 0.5
.FT
program test (output);
begin
if (a = b] write (a);
end.
.sp 0.5
.sz 10
.r
Error Messages:
.sp 0.5
.FT
3, 13: Error syntax error
3, 13: Information expected symbols: ')' '*' '+' '-' '/' '<' '<=' '=' '<>'
'>' '>=' AND DIV IN MOD OR
3, 15: Information restart point
3, 15: Repair symbol inserted : ')'
3, 15: Repair symbol inserted : THEN
.ft R
.sz 10
Fig. 6: Example of Automatic Error Messages
.)z
.pp
The generated parsers include information and algorithms to handle syntax
errors completely automatically.
We follow the complete backtrack-free method described by
\*([[R\*oh76\*(],R\*oh80\*(],R\*oh82\*(]]
and provide expressive reporting, recovery, and repair. Every incorrect input
is "virtually" transformed into a syntactically correct program with the
consequence of only executing a "correct" sequence of semantic actions.
Therefore the following compiler phases like semantic analysis don't have to
bother with syntax errors. \fILalr\fP provides a prototype error module which
prints messages as shown in Figure 6.
Internally the error recovery works as follows:
.ip - 0.5c
The location of the syntax error is reported.
.ip - 0.5c
All the tokens that would be a legal continuation of the program are
computed and reported.
.ip - 0.5c
All the tokens that can serve to continue parsing are computed. A minimal
sequence of tokens is skipped until one of these tokens is found.
.ip - 0.5c
The recovery location is reported.
.ip - 0.5c
Parsing continues in the so-called repair mode. In this mode the parser
behaves as usual except that no tokens are read from the input. Instead a
minimal sequence of tokens is synthesized to repair the error.
The parser stays in this mode until the input token can be accepted.
The synthesized tokens are reported. The program can be regarded as
repaired, if the skipped tokens are replaced by the synthesized ones.
Upon leaving repair mode, parsing continues as usual.
.sh 1 "Comparison of Parser Generators"
.pp
Finally we present a comparison of \fILalr\fP with some other parser
generators that we have access to. We are comparing the features of the
generators and the performance of the generated parsers. Tables 1 to 4 contain
the results and should be self explanatory.
Besides \fILalr\fP we examine:
.ta 1.5c
.ip - 0.5c
Yacc well known from UNIX\*([<\*([[Joh75\*(]]\*(>]
.ip - 0.5c
Bison public domain remake of \fIYacc\fP\*([<\*([[GNU88\*(]]\*(>]
.ip - 0.5c
PGS Parser Generating System also developed at Karlsruhe\*([<\*([[GrK86\*(],KlM89\*(]]\*(>]
.ip - 0.5c
Ell recursive descent parser generator described in\*([<\*([[Gro88\*(]]\*(>].
.EQ
delim $$
.EN
.(b L
.ce
.sp 0.5
Table 1: Comparison of Specification Techniques for Parser Generators
.sp 0.5
.TS
tab (;) box center;
l | l l l l l
l | l l l l l.
;Bison ;Yacc ;PGS ;Lalr ;Ell
_
specification language;BNF ;BNF ;EBNF ;EBNF ;EBNF
grammar class ;LALR(1);LALR(1);LALR(1);LALR(1);LL(1)
; ; ;LR(1) ; ;
; ; ;SLR(1) ; ;
semantic actions ;yes ;yes ;yes ;yes ;yes
S-attribution ;numeric;numeric;symbolic;numeric;-
L-attribution ;- ;- ;- ;- ;symbolic
.TE
.)b
.pp
.i Lalr ,
PGS, and
.i Ell
accept grammars written in extended BNF whereas
.i Yacc
and
.i Bison
require grammars in BNF. The tools
.i Lalr ,
PGS,
.i Yacc ,
and
.i Bison
process the large class of LALR(1) grammars but can only evaluate an S-attribution during
parsing.
.i Ell
on the other hand processes the smaller class of LL(1) grammars but generates parsers which
are 50% faster (see below) and can evaluate a more powerful L-attribution during parsing.
.(b L
.ce
.sp 0.5
Table 2: Features for Grammar Conflicts and Error Recovery
.sp 0.5
.TS
tab (;) box center;
l | l l l l l
l | l l l l l.
;Bison ;Yacc ;PGS ;Lalr ;Ell
_
conflict message ;state, ;state, ;state, ;derivation-;involved
; items; items; items; tree ; productions
conflict solution;precedence;precedence;precedence;precedence;first rule
;associativity;associativity;associativity;associativity;
; ; ;modification; ;
chain rule elimination;- ;- ;yes ;- ;-
error recovery ;by hand;by hand;automatic;automatic;automatic
error repair ;- ;- ;yes ;yes ;yes
.TE
.)b
.pp
In case of grammar conflicts,
.i Lalr
has the advantage of providing derivation trees to support the location and solution of this
kind of problems. The tools
.i Lalr ,
PGS, and
.i Ell
provide automatic error recovery and repair in case of syntax errors.
.(b L
.ce
.sp 0.5
Table 3: Comparison of Implementation Techniques and Languages
.sp 0.5
.TS
tab (;) box center;
l | l l l l l
l | l l l l l.
;Bison ;Yacc ;PGS ;Lalr ;Ell
_
parsing method ;table-driven;table-driven;table-driven;table-driven;recursive
; ; ; ; ; descent
table compression;comb-vector;comb-vector;comb-vector;comb-vector;-
_
implementation languages;C ;C ;Pascal ;C ;C
; ; ; ;Modula ;Modula
target languages ;C ;C ;C ;C ;C
; ; ;Modula ;Modula ;Modula
; ; ;Pascal
; ; ;Ada
.TE
.)b
.pp
All the LALR(1) tools generate table-driven parsers and use the comb-vector technique for
table compression.
.i Ell
produces directly coded recursive descent parsers. Whereas
.i Yacc
and
.i Bison
are implemented in C and generate C code, the sources of
.i Lalr
and
.i Ell
exist in Modula-2 and in C and the tools generate Modula-2 as well as C.
PGS is implemented in Pascal and generates parsers in even more languages.
.pp
The parser measurements (Table 4) exclude time and size for scanning and refer
to experiments with Modula-2 parsers.
The grammar for the LALR(1) tools had 69 terminals, 52 nonterminals,
and 163 productions. The grammar for \fIEll\fP was in extended BNF and
contained 69 terminals, 45 nonterminals, and 91 productions. The timings
result from analyzing a big Modula-2 program containing 28,302 tokens,
7867 lines, or 193,862 characters with the generated parsers.
For this input the parser generated from \fILalr\fP executed
13,827 read, 14,476 read-terminal-reduce, 11,422 read-nonterminal-reduce,
and 18,056 reduce actions. The terminal Table TTable was accessed 46,358
times and the nonterminal Table NTTable 43,953 times.
.(b L
.ce
.sp 0.5
Table 4: Comparison of Parser Speeds and Sizes
.sp 0.5
.TS
tab (;) box center;
l | r r r r r
l | r r r r r.
;Bison;Yacc;PGS;Lalr;Ell
_
speed [$10 sup 3$ tokens/sec.];8.93;15.94;17.32;34.94;54.64
speed [$10 sup 3$ lines/min.];150;270;290;580;910
_
table size [bytes];7,724;9,968;9,832;9,620;-
parser size [bytes];10,900;12,200;14,140;16,492;18,048
_
generation time [sec.];4.92;17.28;51.04;27.46;10.48
.TE
.)b
.EQ
delim off
.EN
.pp
In the presented experiment \fILalr\fP generated parsers are twice as fast
as those of \fIYacc\fP. In general, we observed speed-ups between two and
three depending on the grammar. The sizes of the parse tables are nearly the
same in all cases.
The parser generated by \fILalr\fP is 35% larger then the \fIYacc\fP
parser, mainly because of the code for error recovery. The generation times
vary widely. The reasons can be found in the different algorithms for
computing the look-ahead sets and the quality of the implementation.
\fILalr\fP spends a considerable amount of time in printing the derivation
trees for LR-conflicts.
PGS generated parsers turned out to be faster in comparison to
.i Yacc ,
but
.i Bison
performed considerably slower. Parsers generated by
.i Ell
were found to be the fastest exceeding the speed of
.i Lalr
by 50%.
.pp
The measurements of the parser speed turned out to be a hairy business.
The results can be influenced in many ways from:
.ip - 0.5c
The hardware: we used a PCS Cadmus 9900 with a MC68020 processor running
at a clock rate of 16.7 MHz.
.ip - 0.5c
The loader: our timings include the time to load the parser which is almost
zero.
.ip - 0.5c
The compiler: we used the C compiler of PCS.
.ip - 0.5c
The language: we used Modula-2.
.ip - 0.5c
The size of the language: in the case of \fILalr\fP the size of the
language or the size of the grammar does not influence the speed of the
parser because the same table-driven algorithm and the same data structure
is used in every case. This can be different for other parsers. For example
the speed of directly coded parsers decreases with the grammar size.
One reason is that linear or binary-tree search is done to determine
the next action. Another reason can be that long jump instructions become
necessary instead of short ones.
PGS stores states in one byte if there are less than 256 states and in
two bytes otherwise. This decreases the speed for large grammars, too,
at least on byte-addressable machines.
.ip - 0.5c
The grammar style, the number of rules, especially chain rules and the
like: we used the same grammar except for \fIEll\fP which had as few chain
rules as possible and which caused as few reduce actions as possible. This
means e. g. we specified expressions in an ambiguous style like shown in
Figure 2.
.\" Exceptions are \fIEll\fP which needs an LL(1) grammar and PGS, because
.\" modifications are inelegant to resolve many ambiguities.
.ip - 0.5c
The test input: we used the same large Modula program as test data in
every case, of course.
Nevertheless the programming style or the code "density" influence the
resulting speed when it is measured in lines per minute.
.ip - 0.5c
The timing: we measured CPU-time and subtracted the scanner time from the
total time (scanner and parser) to get the parser time. We used the UNIX
shell command \fItime\fP to get the time and added user and system time.
.ip - 0.5c
The measure: we selected tokens per second as well as lines per minute as
measures. The first one is independent of the density of the input and
therefore more exact. The second one has been used by other authors and it
is more expressive for a user.
.ip - 0.5c
The semantic actions: we specified empty semantic actions for all rules in
order to simulate the conditions in a realistic application. This has more
consequences than one might think. It disables a short cut of \fIYacc\fP and
the chain rule elimination\*([<\*([[WaG84\*(]]\*(>] of PGS, decreasing the speed in
both cases.
.\" A further experiment with PGS revealed even more problems. To allow chain
.\" rule elimination we deleted the empty semantic actions for chain rules.
.\" Surprisingly, instead of getting faster the parser was slower. The reason is
.\" that chain rule elimination increases the number of states. Accidentally we
.\" exceeded the number of 256. Now states have to be stored in two bytes instead
.\" of one. The additional memory accesses are more expensive than the win from
.\" the chain rule elimination.
.pp
Our conclusion from the numerous problems with the measurement of
parser speed is that results from different environments or from different
people can not be compared. There are too many conditions that influence the
results and usually only a few of these conditions are reported.
.sh 1 "Conclusion"
.pp
We showed that table-driven, portable LR(1) parsers can be implemented
efficiently in C or similar languages. Following the presented ideas the
parser generator \fILalr\fP generates parsers that are two to three times
faster than parsers generated by \fIYacc\fP. They are vary fast and reach a
speed of 35,000 tokens per second or 580,000 lines per minute.
The generated parsers have all the features needed for practical
applications such as semantic actions, S-attribution, and error recovery.
.pp
We have shown how to develop an efficient parsing algorithm in a systematic
way. The starting point was a basic algorithm from a textbook. In a
stepwise manner we turned it into a relatively short yet efficient algorithm
mainly using pseudo code. Target code was introduced only in the last step.
.pp
We presented the main features of the parser generator \fILalr\fP
from the user's point of view. A valuable feature of \fILalr\fP is that it
prints a derivation tree in case of LR-conflicts to aid the location of
the problem.
We finally compared the features and performance of some parser generators.
.\" The description of the implementation shows how the performance is achieved.
.\" .pp
.\" We hope to have convinced the reader that this way of program development
.\" works and produces fast and compact code. Unfortunately the story told is not
.\" true. We did arrive at the presented solution and it does have the described
.\" properties. The stepwise development shown in chapter 2 would have been the
.\" right way. But in reality our path of development was a completely different
.\" and winding one. We tried to transfer the implementation of a table-driven
.\" scanner to a parser. A first paper about an early version of the parsing
.\" algorithm regarded the underlying principle completely different and
.\" contained some errors. After fixing the errors we finally discovered that
.\" the interpretation given in this paper is the right one and an ideal
.\" development should have been following the steps given in chapter 2.
.uh Acknowledgements
.pp
Whereas the author contributed the parser skeletons in C and Modula-2, the
generator program \fILalr\fP was written and debugged by Bertram Vielsack
who also provided the experimental results.
Valuable suggestions to improve this paper are due to Kai Koskimies and
Nigel Horspool.
Nick Graham deserves many thanks for transforming my text into idiomatic
English.
.fi
.sz 12
.[]
.[-
.ds [F ASU86
.ds [A A\*(p]\*(a]V\*(p] Aho
.as [A \*(c]R\*(p] Sethi
.as [A \*(m]J\*(p]\*(a]D\*(p] Ullman
.ds [T Compilers: Principles, Techniques, and Tools
.ds [I Addison Wesley
.ds [C Reading, M\&A
.ds [D 1986
.][
.[-
.ds [F Ass88
.ds [A W\*(p] Assmann
.ds [A W. A\\*smann
.ds [T A Short Review of High Speed Compilation
.ds [V 371
.ds [J LNCS
.ds [C Berlin
.ds [I Springer Verlag
.nr [P 1
.ds [P 1-10
.ds [D Oct. 1988
.][
.[-
.ds [F DeP82
.ds [A F\*(p] DeRemer
.as [A \*(n]T\*(p] Pennello
.ds [T Efficient Computation of LALR(1) Look-Ahead Sets
.nr [P 1
.ds [P 615-649
.ds [J ACM Trans. Prog. Lang. and Systems
.ds [V 4
.ds [N 4
.ds [D Oct. 1982
.][
.[-
.ds [F GNU88
.ds [A GNU\ Project
.ds [T Bison - Manual Page
.ds [I Public Domain Software
.ds [D 1988
.][
.[-
.ds [F GrK86
.ds [A J\*(p] Grosch
.as [A \*(n]E\*(p] Klein
.ds [T User Manual for the PGS-System
.ds [I GMD Forschungsstelle an der Universit\\*:at Karlsruhe
.ds [D Aug. 1986
.][
.[-
.ds [F Gro88
.ds [A J\*(p] Grosch
.ds [T Generators for High-Speed Front-Ends
.ds [V 371
.ds [J LNCS
.ds [C Berlin
.ds [I Springer Verlag
.nr [P 1
.ds [P 81-92
.ds [D Oct. 1988
.][
.[-
.ds [F GrV88
.ds [A J\*(p] Grosch
.as [A \*(n]B\*(p] Vielsack
.ds [T The Parser Generators Lalr and Ell
.ds [I GMD Forschungsstelle an der Universit\\*:at Karlsruhe
.ds [R Compiler Generation Report No. 8
.ds [N 8
.ds [D Apr. 1988
.][
.[-
.ds [F HoW89
.ds [A R\*(p]\*(a]N\*(p] Horspool
.as [A \*(n]M\*(p] Whitney
.ds [T Even Faster LR Parsing
.ds [I University of Victoria, Department of Computer Science
.ds [R Report Dept. of Computer Science-114-IR
.ds [D May 1989
.][
.[-
.ds [F Joh75
.ds [A S\*(p]\*(a]C\*(p] Johnson
.ds [T Yacc \(em Yet Another Compiler-Compiler
.ds [R Computer Science Technical Report 32
.ds [I Bell Telephone Laboratories
.ds [C Murray Hill, NJ
.ds [D July 1975
.][
.[-
.ds [F KlM89
.ds [A E\*(p] Klein
.as [A \*(n]M\*(p] Martin
.ds [T The Parser Generating System PGS
.ds [J Software\(emPractice & Experience
.ds [V 19
.ds [N 11
.nr [P 1
.ds [P 1015-1028
.ds [D Nov. 1989
.][
.[-
.ds [F Pen86
.ds [A T\*(p]\*(a]J\*(p] Pennello
.ds [T Very Fast LR Parsing
.ds [J SI\&GPLAN Notices
.ds [V 21
.ds [N 7
.nr [P 1
.ds [P 145-151
.ds [D July 1986
.][
.[-
.ds [F R\*oh76
.ds [A J\*(p] R\\*:ohrich
.ds [T Syntax-Error Recovery in LR-Parsers
.ds [E H\*(p] Schneider
.ds [E H.-J\*(p] Schneider
.as [E \*(n]M\*(p] Nagl
.nr [E 2
.ds [S Programmiersprachen, 4. Fachtagung der GI, Erlangen
.ds [B Informatik-Fachberichte
.ds [V 1
.nr [P 1
.ds [P 175-184
.ds [C Berlin
.ds [I Springer Verlag
.ds [D 1976
.][
.[-
.ds [F R\*oh80
.ds [A J\*(p] R\\*:ohrich
.ds [T Methods for the Automatic Construction of Error Correcting Parsers
.ds [J Acta Inf.
.ds [V 13
.ds [N 2
.nr [P 1
.ds [P 115-139
.ds [D 1980
.][
.[-
.ds [F R\*oh82
.ds [A J\*(p] R\\*:ohrich
.ds [T Behandlung syntaktischer Fehler
.ds [J Informatik Spektrum
.ds [V 5
.ds [N 3
.nr [P 1
.ds [P 171-184
.ds [D 1982
.][
.[-
.ds [F WaG84
.ds [A W\*(p]\*(a]M\*(p] Waite
.as [A \*(n]G\*(p] Goos
.ds [T Compiler Construction
.ds [I Springer Verlag
.ds [C New York, NY
.ds [D 1984
.][
.[-
.ds [F WhH88
.ds [A M\*(p] Whitney
.as [A \*(n]R\*(p]\*(a]N\*(p] Horspool
.ds [T Extremely Rapid LR Parsing
.ds [E D\*(p] Hammer
.nr [E 1
.ds [B Proceedings of the Workshop on Compiler Compiler and High Speed Compilation
.ds [C Berlin, GDR
.nr [P 1
.ds [P 248-257
.ds [D Oct. 1988
.][
.bp
.lp
.uh "Appendix"
.vs 12
.sp
.ce
Example of a Generated Parser
.sp
.nf
Grammar:
.sp
.FT
.\" TOKEN
.\" 'i' = 105
.\" 'n' = 110
.\" '=' = 61
.\" '+' = 43
.\" '-' = 45
.\" '*' = 42
.\" '/' = 47
.\" '(' = 40
.\" ')' = 41
.\"
.\" OPER
.\" LEFT '+' '-'
.\" LEFT '*' '/'
.\"
.\" RULE
L : L S | .
S : 'i' '=' E .
E : E '+' E
| E '-' E
| E '*' E
| E '/' E
| 'i'
| 'n'
| '(' E ')' .
.sp
.sz 10
.r
Parser:
.sp
.FT
.\" typedef int tScanAttribute;
.\" typedef int bool;
.\" # define false 0
.\" tScanAttribute Attribute;
.\"
# include "DynArray.h"
.sp 0.5
# define yyInitStackSize 100
# define yyNoState 0
# define yyTableMax 122
# define yyNTableMax 119
# define yyLastReadState 13
# define yyFirstReadReduce 14
# define yyFirstReduce 20
# define yyStartState 1
# define yyStopState 20
.sp 0.5
typedef short yyStateRange;
typedef struct { yyStateRange Check, Next; } yyTCombType;
typedef struct { tScanAttribute Scan; } tParsAttribute;
.sp 0.5
static yyTCombType * yyTBasePtr [yyLastReadState + 1];
static yyStateRange * yyNBasePtr [yyLastReadState + 1];
static yyStateRange yyDefault [yyLastReadState + 1];
static yyTCombType yyTComb [yyTableMax + 1];
static yyStateRange yyNComb [yyNTableMax + 1];
static int yyErrorCount;
.sp 0.5
int Parse ()
{
register yyStateRange yyState ;
register long yyTerminal ;
register yyStateRange * yyStateStackPtr ;
register tParsAttribute * yyAttrStackPtr ;
register bool yyIsRepairing ;
unsigned long yyStateStackSize = yyInitStackSize;
unsigned long yyAttrStackSize = yyInitStackSize;
yyStateRange * yyStateStack ;
tParsAttribute * yyAttributeStack;
tParsAttribute yySynAttribute ; /* synthesized attribute */
yyStateRange * yyMaxPtr;
.sp 0.5
yyState = yyStartState;
yyTerminal = GetToken ();
MakeArray (& yyStateStack, & yyStateStackSize, sizeof (yyStateRange));
MakeArray (& yyAttributeStack, & yyAttrStackSize, sizeof (tParsAttribute));
yyMaxPtr = & yyStateStack [yyStateStackSize];
yyStateStackPtr = yyStateStack;
yyAttrStackPtr = & yyAttributeStack [1];
yyErrorCount = 0;
yyIsRepairing = false;
.sp 0.5
ParseLoop:
for (;;) {
if (yyStateStackPtr >= yyMaxPtr) { /* stack overflow? */
int StateStackPtr = yyStateStackPtr - yyStateStack;
int AttrStackPtr = yyAttrStackPtr - yyAttributeStack;
ExtendArray (& yyStateStack, & yyStateStackSize, sizeof (yyStateRange));
ExtendArray (& yyAttributeStack, & yyAttrStackSize, sizeof (tParsAttribute));
yyStateStackPtr = yyStateStack + StateStackPtr;
yyAttrStackPtr = yyAttributeStack + AttrStackPtr;
yyMaxPtr = & yyStateStack [yyStateStackSize];
};
* yyStateStackPtr = yyState;
.sp 0.5
TermTrans:
for (;;) { /* SPEC State = Table (State, Terminal); terminal transition */
register yyStateRange * TCombPtr;
.sp 0.5
TCombPtr = (yyStateRange *) (yyTBasePtr [yyState] + yyTerminal);
if (* TCombPtr ++ == yyState) { yyState = * TCombPtr; break; };
if ((yyState = yyDefault [yyState]) != yyNoState) goto TermTrans;
.sp 0.5
/* error recovery */
};
.sp 0.5
if (yyState >= yyFirstReadReduce) { /* reduce or read-reduce? */
if (yyState < yyFirstReduce) { /* read-reduce */
yyStateStackPtr ++;
(yyAttrStackPtr ++)->Scan = Attribute;
yyTerminal = GetToken ();
yyIsRepairing = false;
};
.sp 0.5
for (;;) {
register long yyNonterminal;
.sp 0.5
switch (yyState) {
case yyStopState: /* S' : L End-of-Tokens . */
ReleaseArray (& yyStateStack, & yyStateStackSize, sizeof (yyStateRange));
ReleaseArray (& yyAttributeStack, & yyAttrStackSize, sizeof (tParsAttribute));
return yyErrorCount;
case 21:
case 19: /* L : L S . */
yyStateStackPtr -= 2; yyAttrStackPtr -= 2; yyNonterminal = 111; break;
case 22: /* L : . */
yyStateStackPtr -= 0; yyAttrStackPtr -= 0; yyNonterminal = 111; break;
case 23: /* S : 'i' '=' E . */
yyStateStackPtr -= 3; yyAttrStackPtr -= 3; yyNonterminal = 112; break;
case 24: /* E : E '+' E . */
yyStateStackPtr -= 3; yyAttrStackPtr -= 3; yyNonterminal = 113; break;
case 25: /* E : E '-' E . */
yyStateStackPtr -= 3; yyAttrStackPtr -= 3; yyNonterminal = 113; break;
case 26:
case 17: /* E : E '*' E . */
yyStateStackPtr -= 3; yyAttrStackPtr -= 3; yyNonterminal = 113; break;
case 27:
case 18: /* E : E '/' E . */
yyStateStackPtr -= 3; yyAttrStackPtr -= 3; yyNonterminal = 113; break;
case 28:
case 14: /* E : 'i' . */
yyStateStackPtr -= 1; yyAttrStackPtr -= 1; yyNonterminal = 113; break;
case 29:
case 15: /* E : 'n' . */
yyStateStackPtr -= 1; yyAttrStackPtr -= 1; yyNonterminal = 113; break;
case 30:
case 16: /* E : '(' E ')' . */
yyStateStackPtr -= 3; yyAttrStackPtr -= 3; yyNonterminal = 113; break;
};
.sp 0.5
/* SPEC State = Table (Top (), Nonterminal); nonterminal transition */
yyState = * (yyNBasePtr [* yyStateStackPtr ++] + yyNonterminal);
* yyAttrStackPtr ++ = yySynAttribute;
if (yyState < yyFirstReadReduce) goto ParseLoop;
}; /* read-reduce? */
.sp 0.5
} else { /* read */
yyStateStackPtr ++;
(yyAttrStackPtr ++)->Scan = Attribute;
yyTerminal = GetToken ();
yyIsRepairing = false;
};
};
};
.bp 1
.lp
.b Contents
.sp
.xp